### Creates tables with results from nationally representative surveys (Study #1, Table 2)
### Reads the data from each country, runs the same regressions, assemble results 

library(foreign)
library(car)
library(xtable)
library(plm)
library(sandwich)
library(xtable)

### Declare some functions ####
stack.regs <- function(x,SE=T){##TO create a table of coefficients
	require(reshape)
	if(class(x)=="lm"){x<-summary(x)}
	if(class(x)!="summary.lm"){cat("x should be a lm object\n");break}
	R2 <- x$r.sq
	N <- length(x$residuals)
	x <- as.data.frame(x$coefficients)
	x$vars <- rownames(x)	
	out <- melt.data.frame(x, "vars", variable_name = "desc")
	out <- subset(out,is.element(desc,unique(out$desc)[-3]))
	out <- out[order(out$vars),]
	names(out)[3]<-"v"
	out$v <- round(as.numeric(out$v),3)
	rownames(out)<-1:nrow(out)
	tmp <- data.frame(vars=c("R2","N"),desc=c("",""),v=c(R2,N))
	out <- rbind(out,tmp)
	out$desc <- car::recode(out$desc,"'Pr(>|t|)'='zP-value'")
	return(out)
	}

stack.regs.robust <- function(x,SE=F){##TO create a table of coefficients
	require(reshape)
		require(car)
	if(class(x)!="lm"){cat("x should be a summary lm object\n");break}
	#Robust SE and pvals,  almost identical to p-values from t-tests					
			robSE <- sqrt(diag(vcovHC(x,type="HC2")))   #  "HC4m")))
			terms <- length(coef(x))
			robP <- rep(NA,terms)
			for(jj in 1:terms){
			robP[jj] <- linearHypothesis(x,diag(terms)[jj,],vcov=vcovHC(x,type="HC2"))$Pr[2]}	
	x<-summary(x)		
	R2 <- x$r.sq
	N <- length(x$residuals)
	x <- as.data.frame(x$coefficients)
	x$'Std. Error'<-robSE
	x$'Pr(>|t|)'<-round(robP,3)
	x$vars <- rownames(x)	
	out <- melt.data.frame(x, "vars", variable_name = "desc")
	if(SE){out<- subset(out,is.element(desc,unique(out$desc)[-3]))#not report tvals
	 }else{out <- subset(out,is.element(desc,unique(out$desc)[-c(2,3)]))} #not report tvals& SE
	out <- out[order(out$vars),]
	names(out)[3]<-"v"
	out$v <- round(as.numeric(out$v),3)
	rownames(out)<-1:nrow(out)
	tmp <- data.frame(vars=c("R2","N"),desc=c("",""),v=c(R2,N))
	out <- rbind(out,tmp)
	out$desc <- car::recode(out$desc,"'Pr(>|t|)'='zP-value'")
	return(out)
	}

rob.P <- function(x){#computes p-values using robust SE
		require(car)
		require(sandwich)
			require(plm)
		terms <- length(coef(x))
		robP <- rep(NA,terms)
		for(jj in 1:terms){
			robP[jj] <- linearHypothesis(x,diag(terms)[jj,],vcov=vcovHC(x,type="HC2"))$Pr[2]}
		return(robP)}
		
robse <- function(x,sig=0.95){
	require(sandwich)
	require(plm)
	robSE <- sqrt(diag(vcovHC(x,type="HC2")))
	terms <- length(coef(x))
	robP <- rep(NA,terms)
	for(jj in 1:terms){
			robP[jj] <- linearHypothesis(x,diag(terms)[jj,],vcov=vcovHC(x,type="HC2"))$Pr[2]}
	upr <- coef(x) + qnorm(sig) * robSE
	lwr <- coef(x) - qnorm(sig) * robSE
	out <- cbind(robse=robSE,upr=upr,lwr=lwr,robpval=robP)
	return(out)
}

star <- function(x){
	x$st <- NA
	to.star <- grep("P-value",x$desc)
	stars <- ifelse(x$v[to.star]<0.01,"***",
			 ifelse(x$v[to.star]<0.05,"**",
			 ifelse(x$v[to.star]<0.1,"*","")))
	x$st[to.star-1] <- stars 
	return(x)
}

# Read the formated data set for each country
load("S1-SurveyBrazil.RData")
db <- d
load("S1-SurveyTurkey.RData")
dt <- d
rm(d)

# Regressions for Brazil
db$ses <- db$CLASSE
db$uprclass <- db$CLASSE!="Classe D/ E"&db$CLASSE!="Classe C2"
db$poorest <- db$CLASSE=="Classe D/ E"
db$OUTraw <- db$OUT #keep the unstandardized 
db$OUT <- scale(db$OUT) #standardize the variable
regall <- lm(OUT~Tcond,data=db)
regallC <-lm(OUT~Tcond+
			SEXO+IDADE+CLASSE+RACA,data=db)
regupr <- lm(OUT~Tcond,data=subset(db,uprclass==T))
reguprC <-lm(OUT~Tcond+
			SEXO+IDADE+CLASSE+RACA,data=subset(db,uprclass==T))
reglwr <- lm(OUT~Tcond,data=subset(db,uprclass==F))
reglwrC <- lm(OUT~Tcond+
			SEXO+IDADE+CLASSE+RACA,data=subset(db,uprclass==F))
regnotpoorest <- lm(OUT~Tcond,data=subset(db,poorest==F))
regnotpoorestC <-lm(OUT~Tcond+
			SEXO+IDADE+CLASSE+RACA,data=subset(db,poorest==F))
regpoorest <- lm(OUT~Tcond,data=subset(db,poorest==T))
regpoorestC<- lm(OUT~Tcond+
			SEXO+IDADE+RACA,data=subset(db,poorest==T))
regmidest <- lm(OUT~Tcond,data=subset(db,poorest==F&uprclass==F))
regmidestC <- lm(OUT~Tcond+
			SEXO+IDADE+RACA,data=subset(db,poorest==F&uprclass==F))

regsb <- list(regall,regallC,regupr,reguprC,reglwr,reglwrC,regnotpoorest,regnotpoorestC,
regpoorest,regpoorestC,regmidest,regmidestC)


# Regressions for Turkey
dt$uprclass <- dt$ses!="DE"&dt$ses!="C2"
dt$poorest <- dt$ses=="DE"
dt$OUTraw <- dt$OUT #keep the unstandardized 
dt$OUT <- scale(dt$OUT) #standardize the variable
regall <- lm(OUT~Tcond,data=dt,weights=dt$weight)
regallC <-lm(OUT~Tcond+
			female+age+ses,data=dt,weights=dt$weight)
regupr <- lm(OUT~Tcond,data=subset(dt,uprclass==T),weights=dt$weight[dt$uprclass==T])
reguprC <-lm(OUT~Tcond+
			female+age+ses,data=subset(dt,uprclass==T),weights=dt$weight[dt$uprclass==T])
reglwr <- lm(OUT~Tcond,data=subset(dt,uprclass==F),weights=dt$weight[dt$uprclass==F])
reglwrC <- lm(OUT~Tcond+
			female+age+ses,data=subset(dt,uprclass==F),weights=dt$weight[dt$uprclass==F])
regnotpoorest <- lm(OUT~Tcond,data=subset(dt,poorest==F))
regnotpoorestC <-lm(OUT~Tcond+
			female+age+ses,data=subset(dt,poorest==F))
regpoorest <- lm(OUT~Tcond,data=subset(dt,poorest==T))
regpoorestC<- lm(OUT~Tcond+
			female+age,data=subset(dt,poorest==T))
regmidest <- lm(OUT~Tcond,data=subset(dt,poorest==F&uprclass==F))
regmidestC <- lm(OUT~Tcond+
			female+age,data=subset(dt,poorest==F&uprclass==F))

regst <- list(regall,regallC,regupr,reguprC,reglwr,reglwrC,regnotpoorest,regnotpoorestC,
regpoorest,regpoorestC,regmidest,regmidestC)
	
### Examine definitions of "better off" in each country
buprshare <- round(100*prop.table(table(db$uprclass))[2],1)
tuprshare <- round(100*weights::wpct(dt$uprclass,dt$weight)[1],1)

### Examine definitions of "poorest" in each country
buprshare <- round(100*prop.table(table(db$poorest))[2],1)
tuprshare <-  round(100*weights::wpct(dt$poorest)[1],1)

### Create very abbreviated regression table (Table 2)
### Take the same regression for each country and assemble a table
### Keep only the coefficient on the treatment, significance (p-value indicator)

### Stack results for all respondents, with and without controls, for each country
table01 <- 	merge(
		merge(star(stack.regs.robust(regsb[[1]])),star(stack.regs.robust(regsb[[2]]))
			,by=c("vars","desc"),suffixes=c("No","Yes")),
		merge(star(stack.regs.robust(regst[[1]])),star(stack.regs.robust(regst[[2]]))
			,by=c("vars","desc"),suffixes=c("No","Yes")),
			by=c("vars","desc"),suffixes=c("Brazil","Turkey"))[5,]
	
### Stack high SES respondents, for each country, with and without controls
table02 <-  merge(
		merge(star(stack.regs.robust(regsb[[3]])),star(stack.regs.robust(regsb[[4]]))
			,by=c("vars","desc"),suffixes=c("No","Yes")),
		merge(star(stack.regs.robust(regst[[3]])),star(stack.regs.robust(regst[[4]]))
			,by=c("vars","desc"),suffixes=c("No","Yes")),
			by=c("vars","desc"),suffixes=c("Brazil","Turkey"))[5,]

### Stack responses for regressions excluding poorest respondents, for each country, with and without controls
table03 <-   merge(
		merge(star(stack.regs.robust(regsb[[7]])),star(stack.regs.robust(regsb[[8]]))
			,by=c("vars","desc"),suffixes=c("No","Yes")),
		merge(star(stack.regs.robust(regst[[7]])),star(stack.regs.robust(regst[[8]]))
			,by=c("vars","desc"),suffixes=c("No","Yes")),
			by=c("vars","desc"),suffixes=c("Brazil","Turkey"))[5,]

### Stack responses for regression of pooorest respondents
table04 <-   merge(
		merge(star(stack.regs.robust(regsb[[9]])),star(stack.regs.robust(regsb[[10]]))
			,by=c("vars","desc"),suffixes=c("No","Yes")),
		merge(star(stack.regs.robust(regst[[9]])),star(stack.regs.robust(regst[[10]]))
			,by=c("vars","desc"),suffixes=c("No","Yes")),
			by=c("vars","desc"),suffixes=c("Brazil","Turkey"))[5,]


### Stack responses for regression of middle respondents
table05 <-   merge(
		merge(star(stack.regs.robust(regsb[[11]])),star(stack.regs.robust(regsb[[12]]))
			,by=c("vars","desc"),suffixes=c("No","Yes")),
		merge(star(stack.regs.robust(regst[[11]])),star(stack.regs.robust(regst[[12]]))
			,by=c("vars","desc"),suffixes=c("No","Yes")),
			by=c("vars","desc"),suffixes=c("Brazil","Turkey"))[5,]

### Produce the Table 2 in the paper
regtab <- rbind(table01,table04,table05,table02)
regtab$vars <- regtab$desc <- NULL
rownames(regtab) <- c("All Respondents","Lowest SES","Middle SES","High SES")
xregtab <-  xtable(regtab,digits=3)
caption(xregtab) <- "Stanrdarized Average and Heterogenous Treatment Effects --- Study #1"
label(xregtab) <- "tabstudy1"
print(xregtab ,caption.placement="top",include.rownames=T)

